For the scatterplot, I would like to see if there is a relationship between age and the working hours. This is going to be our first time to publish our scatterplot on github. Thanks to Naeem for organizing the repository for us.
library("dplyr")
library("ggplot2")
library("readr")
library(data.table)
library(stringr)
library(plotly)
library(foreign)
library(tidyverse)
#Set working directory
setwd("C:/Users/diana/DataViz")
getwd()
## [1] "C:/Users/diana/DataViz"
mypath <- "C:/Users/diana/DataViz/GSS7216_R2.DTA"
GSS <-read.dta(mypath)
GSS2016 <- GSS%>% filter(year==2016)
dim(GSS2016)
## [1] 2867 5897
#head(GSS2016)
hours_age <- GSS2016 %>% select(hrs1,age)
head(hours_age)
## hrs1 age
## 1 50 47
## 2 42 61
## 3 NA 72
## 4 30 43
## 5 5 55
## 6 NA 53
scatter <- ggplot(hours_age,aes(x = age, y = hrs1))+ geom_point()
ggplotly()
#Basic scatterplot using plot_ly
p <- plot_ly(data = hours_age, x = ~age, y = ~hrs1)
p
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
## Warning: Ignoring 1229 observations
f <- list(
family = "Courier New, monospace",
size = 18,
color = "#7f7f7f"
)
x <- list(
title = "Age",
titlefont = f
)
y <- list(
title = "Working Hours per Week",
titlefont = f
)
# Styled scatterplot
scatterplot <- plot_ly(data = hours_age, x = ~age, y = ~hrs1, type = "scatter",
mode ="markers",
marker = list(size = 5,
color = 'rgba(255, 182, 193, .9)',
line = list(color = 'rgba(152, 0, 0, .8)',
width = 1))) %>%
layout(annotations = list(
text = "Are people still working at their later age?",
font = list(size = 19), x = 0.5,
xref = "paper", showarrow = FALSE,
xanchor = "center",
y = 1.05,
yref = "paper"),
yaxis = y,
xaxis = x)
scatterplot
## Warning: Ignoring 1229 observations